--- /dev/null
+Date: Thu, 9 Jan 2025 02:27:56 +0000
+Subject: batman-adv: fix panic during interface removal
+
+Reference counting is used to ensure that
+batadv_hardif_neigh_node and batadv_hard_iface
+are not freed before/during
+batadv_v_elp_throughput_metric_update work is
+finished.
+
+But there isn't a guarantee that the hard if will
+remain associated with a soft interface up until
+the work is finished.
+
+This fixes a crash triggered by reboot that looks
+like this:
+
+Call trace:
+ batadv_v_mesh_free+0xd0/0x4dc [batman_adv]
+ batadv_v_elp_throughput_metric_update+0x1c/0xa4
+ process_one_work+0x178/0x398
+ worker_thread+0x2e8/0x4d0
+ kthread+0xd8/0xdc
+ ret_from_fork+0x10/0x20
+
+(the batadv_v_mesh_free call is misleading,
+and does not actually happen)
+
+I was able to make the issue happen more reliably
+by changing hardif_neigh->bat_v.metric_work work
+to be delayed work. This allowed me to track down
+and confirm the fix.
+
+Fixes: 5c3245172c01 ("batman-adv: ELP - compute the metric based on the estimated throughput")
+ soft_iface]
+Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/1d7c8ac629678b8cd23ef9def7c7b111cb9e8ed5
+
+--- a/net/batman-adv/bat_v_elp.c
++++ b/net/batman-adv/bat_v_elp.c
+@@ -66,12 +66,19 @@ static void batadv_v_elp_start_timer(str
+ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
+ {
+ struct batadv_hard_iface *hard_iface = neigh->if_incoming;
++ struct net_device *soft_iface = hard_iface->soft_iface;
+ struct ethtool_link_ksettings link_settings;
+ struct net_device *real_netdev;
+ struct station_info sinfo;
+ u32 throughput;
+ int ret;
+
++ /* don't query throughput when no longer associated with any
++ * batman-adv interface
++ */
++ if (!soft_iface)
++ return BATADV_THROUGHPUT_DEFAULT_VALUE;
++
+ /* if the user specified a customised value for this interface, then
+ * return it directly
+ */
+@@ -141,7 +148,7 @@ static u32 batadv_v_elp_get_throughput(s
+
+ default_throughput:
+ if (!(hard_iface->bat_v.flags & BATADV_WARNING_DEFAULT)) {
+- batadv_info(hard_iface->soft_iface,
++ batadv_info(soft_iface,
+ "WiFi driver or ethtool info does not provide information about link speeds on interface %s, therefore defaulting to hardcoded throughput values of %u.%1u Mbps. Consider overriding the throughput manually or checking your driver.\n",
+ hard_iface->net_dev->name,
+ BATADV_THROUGHPUT_DEFAULT_VALUE / 10,
--- /dev/null
+Date: Wed, 22 Jan 2025 21:51:20 +0100
+Subject: batman-adv: Ignore neighbor throughput metrics in error case
+
+If a temporary error happened in the evaluation of the neighbor throughput
+information, then the invalid throughput result should not be stored in the
+throughtput EWMA.
+
+Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/ce8b40b7bdcc7f011191acda4c2d3067566eef54
+
+--- a/net/batman-adv/bat_v_elp.c
++++ b/net/batman-adv/bat_v_elp.c
+@@ -59,11 +59,13 @@ static void batadv_v_elp_start_timer(str
+ /**
+ * batadv_v_elp_get_throughput() - get the throughput towards a neighbour
+ * @neigh: the neighbour for which the throughput has to be obtained
++ * @pthroughput: calculated throughput towards the given neighbour in multiples
++ * of 100kpbs (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
+ *
+- * Return: The throughput towards the given neighbour in multiples of 100kpbs
+- * (a value of '1' equals 0.1Mbps, '10' equals 1Mbps, etc).
++ * Return: true when value behind @pthroughput was set
+ */
+-static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
++static bool batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh,
++ u32 *pthroughput)
+ {
+ struct batadv_hard_iface *hard_iface = neigh->if_incoming;
+ struct net_device *soft_iface = hard_iface->soft_iface;
+@@ -77,14 +79,16 @@ static u32 batadv_v_elp_get_throughput(s
+ * batman-adv interface
+ */
+ if (!soft_iface)
+- return BATADV_THROUGHPUT_DEFAULT_VALUE;
++ return false;
+
+ /* if the user specified a customised value for this interface, then
+ * return it directly
+ */
+ throughput = atomic_read(&hard_iface->bat_v.throughput_override);
+- if (throughput != 0)
+- return throughput;
++ if (throughput != 0) {
++ *pthroughput = throughput;
++ return true;
++ }
+
+ /* if this is a wireless device, then ask its throughput through
+ * cfg80211 API
+@@ -111,19 +115,24 @@ static u32 batadv_v_elp_get_throughput(s
+ * possible to delete this neighbor. For now set
+ * the throughput metric to 0.
+ */
+- return 0;
++ *pthroughput = 0;
++ return true;
+ }
+ if (ret)
+ goto default_throughput;
+
+- if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT))
+- return sinfo.expected_throughput / 100;
++ if (sinfo.filled & BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT)) {
++ *pthroughput = sinfo.expected_throughput / 100;
++ return true;
++ }
+
+ /* try to estimate the expected throughput based on reported tx
+ * rates
+ */
+- if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
+- return cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
++ if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
++ *pthroughput = cfg80211_calculate_bitrate(&sinfo.txrate) / 3;
++ return true;
++ }
+
+ goto default_throughput;
+ }
+@@ -142,8 +151,10 @@ static u32 batadv_v_elp_get_throughput(s
+ hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX;
+
+ throughput = link_settings.base.speed;
+- if (throughput && throughput != SPEED_UNKNOWN)
+- return throughput * 10;
++ if (throughput && throughput != SPEED_UNKNOWN) {
++ *pthroughput = throughput * 10;
++ return true;
++ }
+ }
+
+ default_throughput:
+@@ -157,7 +168,8 @@ default_throughput:
+ }
+
+ /* if none of the above cases apply, return the base_throughput */
+- return BATADV_THROUGHPUT_DEFAULT_VALUE;
++ *pthroughput = BATADV_THROUGHPUT_DEFAULT_VALUE;
++ return true;
+ }
+
+ /**
+@@ -169,15 +181,21 @@ void batadv_v_elp_throughput_metric_upda
+ {
+ struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
+ struct batadv_hardif_neigh_node *neigh;
++ u32 throughput;
++ bool valid;
+
+ neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
+ metric_work);
+ neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
+ bat_v);
+
+- ewma_throughput_add(&neigh->bat_v.throughput,
+- batadv_v_elp_get_throughput(neigh));
++ valid = batadv_v_elp_get_throughput(neigh, &throughput);
++ if (!valid)
++ goto put_neigh;
++
++ ewma_throughput_add(&neigh->bat_v.throughput, throughput);
+
++put_neigh:
+ /* decrement refcounter to balance increment performed before scheduling
+ * this task
+ */
--- /dev/null
+Date: Wed, 22 Jan 2025 21:51:21 +0100
+Subject: batman-adv: Drop unmanaged ELP metric worker
+
+The ELP worker needs to calculate new metric values for all neighbors
+"reachable" over an interface. Some of the used metric sources require
+locks which might need to sleep. This sleep is incompatible with the RCU
+list iterator used for the recorded neighbors. The initial approach to work
+around of this problem was to queue another work item per neighbor and then
+run this in a new context.
+
+Even when this solved the RCU vs might_sleep() conflict, it has a major
+problems: Nothing was stopping the work item in case it is not needed
+anymore - for example because one of the related interfaces was removed or
+the batman-adv module was unloaded - resulting in potential invalid memory
+accesses.
+
+Directly canceling the metric worker also has various problems:
+
+* cancel_work_sync for a to-be-deactivated interface is called with
+ rtnl_lock held. But the code in the ELP metric worker also tries to use
+ rtnl_lock() - which will never return in this case. This also means that
+ cancel_work_sync would never return because it is waiting for the worker
+ to finish.
+* iterating over the neighbor list for the to-be-deactivated interface is
+ currently done using the RCU specific methods. Which means that it is
+ possible to miss items when iterating over it without the associated
+ spinlock - a behaviour which is acceptable for a periodic metric check
+ but not for a cleanup routine (which must "stop" all still running
+ workers)
+
+The better approch is to get rid of the per interface neighbor metric
+worker and handle everything in the interface worker. The original problems
+are solved by:
+
+* creating a list of neighbors which require new metric information inside
+ the RCU protected context, gathering the metric according to the new list
+ outside the RCU protected context
+* only use rcu_trylock inside metric gathering code to avoid a deadlock
+ when the cancel_delayed_work_sync is called in the interface removal code
+ (which is called with the rtnl_lock held)
+
+Fixes: 5c3245172c01 ("batman-adv: ELP - compute the metric based on the estimated throughput")
+Origin: upstream, https://git.open-mesh.org/batman-adv.git/commit/405d49a20a205799ec453cadb3d078dc2808d563
+
+--- a/net/batman-adv/bat_v.c
++++ b/net/batman-adv/bat_v.c
+@@ -113,8 +113,6 @@ static void
+ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh)
+ {
+ ewma_throughput_init(&hardif_neigh->bat_v.throughput);
+- INIT_WORK(&hardif_neigh->bat_v.metric_work,
+- batadv_v_elp_throughput_metric_update);
+ }
+
+ /**
+--- a/net/batman-adv/bat_v_elp.c
++++ b/net/batman-adv/bat_v_elp.c
+@@ -18,6 +18,7 @@
+ #include <linux/jiffies.h>
+ #include <linux/kernel.h>
+ #include <linux/kref.h>
++#include <linux/list.h>
+ #include <linux/minmax.h>
+ #include <linux/netdevice.h>
+ #include <linux/nl80211.h>
+@@ -26,6 +27,7 @@
+ #include <linux/rcupdate.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/skbuff.h>
++#include <linux/slab.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
+ #include <linux/types.h>
+@@ -42,6 +44,18 @@
+ #include "send.h"
+
+ /**
++ * struct batadv_v_metric_queue_entry - list of hardif neighbors which require
++ * and metric update
++ */
++struct batadv_v_metric_queue_entry {
++ /** @hardif_neigh: hardif neighbor scheduled for metric update */
++ struct batadv_hardif_neigh_node *hardif_neigh;
++
++ /** @list: list node for metric_queue */
++ struct list_head list;
++};
++
++/**
+ * batadv_v_elp_start_timer() - restart timer for ELP periodic work
+ * @hard_iface: the interface for which the timer has to be reset
+ */
+@@ -137,10 +151,17 @@ static bool batadv_v_elp_get_throughput(
+ goto default_throughput;
+ }
+
++ /* only use rtnl_trylock because the elp worker will be cancelled while
++ * the rntl_lock is held. the cancel_delayed_work_sync() would otherwise
++ * wait forever when the elp work_item was started and it is then also
++ * trying to rtnl_lock
++ */
++ if (!rtnl_trylock())
++ return false;
++
+ /* if not a wifi interface, check if this device provides data via
+ * ethtool (e.g. an Ethernet adapter)
+ */
+- rtnl_lock();
+ ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
+ rtnl_unlock();
+ if (ret == 0) {
+@@ -175,31 +196,19 @@ default_throughput:
+ /**
+ * batadv_v_elp_throughput_metric_update() - worker updating the throughput
+ * metric of a single hop neighbour
+- * @work: the work queue item
++ * @neigh: the neighbour to probe
+ */
+-void batadv_v_elp_throughput_metric_update(struct work_struct *work)
++static void
++batadv_v_elp_throughput_metric_update(struct batadv_hardif_neigh_node *neigh)
+ {
+- struct batadv_hardif_neigh_node_bat_v *neigh_bat_v;
+- struct batadv_hardif_neigh_node *neigh;
+ u32 throughput;
+ bool valid;
+
+- neigh_bat_v = container_of(work, struct batadv_hardif_neigh_node_bat_v,
+- metric_work);
+- neigh = container_of(neigh_bat_v, struct batadv_hardif_neigh_node,
+- bat_v);
+-
+ valid = batadv_v_elp_get_throughput(neigh, &throughput);
+ if (!valid)
+- goto put_neigh;
++ return;
+
+ ewma_throughput_add(&neigh->bat_v.throughput, throughput);
+-
+-put_neigh:
+- /* decrement refcounter to balance increment performed before scheduling
+- * this task
+- */
+- batadv_hardif_neigh_put(neigh);
+ }
+
+ /**
+@@ -273,14 +282,16 @@ batadv_v_elp_wifi_neigh_probe(struct bat
+ */
+ static void batadv_v_elp_periodic_work(struct work_struct *work)
+ {
++ struct batadv_v_metric_queue_entry *metric_entry;
++ struct batadv_v_metric_queue_entry *metric_safe;
+ struct batadv_hardif_neigh_node *hardif_neigh;
+ struct batadv_hard_iface *hard_iface;
+ struct batadv_hard_iface_bat_v *bat_v;
+ struct batadv_elp_packet *elp_packet;
++ struct list_head metric_queue;
+ struct batadv_priv *bat_priv;
+ struct sk_buff *skb;
+ u32 elp_interval;
+- bool ret;
+
+ bat_v = container_of(work, struct batadv_hard_iface_bat_v, elp_wq.work);
+ hard_iface = container_of(bat_v, struct batadv_hard_iface, bat_v);
+@@ -316,6 +327,8 @@ static void batadv_v_elp_periodic_work(s
+
+ atomic_inc(&hard_iface->bat_v.elp_seqno);
+
++ INIT_LIST_HEAD(&metric_queue);
++
+ /* The throughput metric is updated on each sent packet. This way, if a
+ * node is dead and no longer sends packets, batman-adv is still able to
+ * react timely to its death.
+@@ -340,16 +353,28 @@ static void batadv_v_elp_periodic_work(s
+
+ /* Reading the estimated throughput from cfg80211 is a task that
+ * may sleep and that is not allowed in an rcu protected
+- * context. Therefore schedule a task for that.
++ * context. Therefore add it to metric_queue and process it
++ * outside rcu protected context.
+ */
+- ret = queue_work(batadv_event_workqueue,
+- &hardif_neigh->bat_v.metric_work);
+-
+- if (!ret)
++ metric_entry = kzalloc(sizeof(*metric_entry), GFP_ATOMIC);
++ if (!metric_entry) {
+ batadv_hardif_neigh_put(hardif_neigh);
++ continue;
++ }
++
++ metric_entry->hardif_neigh = hardif_neigh;
++ list_add(&metric_entry->list, &metric_queue);
+ }
+ rcu_read_unlock();
+
++ list_for_each_entry_safe(metric_entry, metric_safe, &metric_queue, list) {
++ batadv_v_elp_throughput_metric_update(metric_entry->hardif_neigh);
++
++ batadv_hardif_neigh_put(metric_entry->hardif_neigh);
++ list_del(&metric_entry->list);
++ kfree(metric_entry);
++ }
++
+ restart_timer:
+ batadv_v_elp_start_timer(hard_iface);
+ out:
+--- a/net/batman-adv/bat_v_elp.h
++++ b/net/batman-adv/bat_v_elp.h
+@@ -10,7 +10,6 @@
+ #include "main.h"
+
+ #include <linux/skbuff.h>
+-#include <linux/workqueue.h>
+
+ int batadv_v_elp_iface_enable(struct batadv_hard_iface *hard_iface);
+ void batadv_v_elp_iface_disable(struct batadv_hard_iface *hard_iface);
+@@ -19,6 +18,5 @@ void batadv_v_elp_iface_activate(struct
+ void batadv_v_elp_primary_iface_set(struct batadv_hard_iface *primary_iface);
+ int batadv_v_elp_packet_recv(struct sk_buff *skb,
+ struct batadv_hard_iface *if_incoming);
+-void batadv_v_elp_throughput_metric_update(struct work_struct *work);
+
+ #endif /* _NET_BATMAN_ADV_BAT_V_ELP_H_ */
+--- a/net/batman-adv/types.h
++++ b/net/batman-adv/types.h
+@@ -596,9 +596,6 @@ struct batadv_hardif_neigh_node_bat_v {
+ * neighbor
+ */
+ unsigned long last_unicast_tx;
+-
+- /** @metric_work: work queue callback item for metric update */
+- struct work_struct metric_work;
+ };
+
+ /**